{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "# Test Suites and Reports for Bicycle Demand Prediction"
   ],
   "metadata": {
    "id": "pqMl9Ouxf6yn"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import requests\n",
    "import zipfile\n",
    "import io\n",
    "\n",
    "from datetime import datetime, time\n",
    "from sklearn import datasets, ensemble\n",
    "\n",
    "from evidently import ColumnMapping\n",
    "\n",
    "from evidently.report import Report\n",
    "from evidently.metric_preset import DataDriftPreset, RegressionPreset\n",
    "\n",
    "from evidently.test_suite import TestSuite\n",
    "from evidently.test_preset import DataDriftTestPreset, RegressionTestPreset\n",
    "from evidently.tests import TestValueMeanError, TestValueMAE, TestValueRMSE"
   ],
   "outputs": [],
   "metadata": {
    "id": "8FP6JHGUf6ys"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "warnings.simplefilter('ignore')"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Bicycle Demand Data"
   ],
   "metadata": {
    "id": "x3OWHRQ0f6yv"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "content = requests.get(\"https://archive.ics.uci.edu/static/public/275/bike+sharing+dataset.zip\", verify=False).content\n",
    "with zipfile.ZipFile(io.BytesIO(content)) as arc:\n",
    "    raw_data = pd.read_csv(arc.open(\"hour.csv\"), header=0, sep=',', parse_dates=['dteday'], index_col='dteday')"
   ],
   "outputs": [],
   "metadata": {
    "id": "uMLMel0Cf6yw"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "raw_data.index = raw_data.apply(\n",
    "    lambda row: datetime.combine(row.name, time(hour=int(row['hr']))), axis = 1)"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "raw_data.head()"
   ],
   "outputs": [],
   "metadata": {
    "id": "pNwLFltuf6yx"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Regression Model"
   ],
   "metadata": {
    "id": "4VypH7uAf6yz"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Model training"
   ],
   "metadata": {
    "id": "9YMdccRpf6y3"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "target = 'cnt'\n",
    "prediction = 'prediction'\n",
    "numerical_features = ['temp', 'atemp', 'hum', 'windspeed', 'hr', 'weekday']\n",
    "categorical_features = ['season', 'holiday', 'workingday']"
   ],
   "outputs": [],
   "metadata": {
    "id": "zbp6euUtf6y3"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']\n",
    "current = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']"
   ],
   "outputs": [],
   "metadata": {
    "id": "T8jbMDVwf6y4"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "reference.head()"
   ],
   "outputs": [],
   "metadata": {
    "id": "SQLQTJy7f6y4",
    "scrolled": true
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regressor = ensemble.RandomForestRegressor(random_state = 0, n_estimators = 50)"
   ],
   "outputs": [],
   "metadata": {
    "id": "im2Bqd3zf6y5"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regressor.fit(reference[numerical_features + categorical_features], reference[target])"
   ],
   "outputs": [],
   "metadata": {
    "id": "bt_5Kfu-f6y5"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "ref_prediction = regressor.predict(reference[numerical_features + categorical_features])\n",
    "current_prediction = regressor.predict(current[numerical_features + categorical_features])"
   ],
   "outputs": [],
   "metadata": {
    "id": "-7b4UNq2f6y6"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "reference['prediction'] = ref_prediction\n",
    "current['prediction'] = current_prediction"
   ],
   "outputs": [],
   "metadata": {
    "id": "tt79FK5Mf6y6"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### Model Performance "
   ],
   "metadata": {
    "id": "4i2oy0k1f6y7"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "column_mapping = ColumnMapping()\n",
    "\n",
    "column_mapping.target = target\n",
    "column_mapping.prediction = prediction\n",
    "column_mapping.numerical_features = numerical_features\n",
    "column_mapping.categorical_features = categorical_features"
   ],
   "outputs": [],
   "metadata": {
    "id": "Rg0JipUif6y7"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regression_performance_report = Report(\n",
    "    metrics=[RegressionPreset()]\n",
    ")\n",
    "\n",
    "regression_performance_report.run(reference_data=None, current_data=reference, column_mapping=column_mapping)"
   ],
   "outputs": [],
   "metadata": {
    "id": "fFGbeOJjf6y8"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regression_performance_report.show()"
   ],
   "outputs": [],
   "metadata": {
    "id": "KSon-d2pf6y8",
    "scrolled": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "#regression_performance_report.save_html('regression_performance_at_training.html')"
   ],
   "outputs": [],
   "metadata": {
    "id": "uVckFZGmf6y8"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "##  Week 1"
   ],
   "metadata": {
    "id": "62qRRKmhf6y9"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regression_performance_test = TestSuite(tests=[\n",
    "    TestValueMeanError(lte=10, gte=-10),\n",
    "    TestValueMAE(lte=15),\n",
    "])\n",
    "\n",
    "regression_performance_test.run(reference_data=None, \n",
    "                                current_data=current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'],\n",
    "                                column_mapping=column_mapping)\n",
    "regression_performance_test"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regression_performance_test.as_dict()"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Week 2"
   ],
   "metadata": {
    "id": "xnrzxRRnf6y_"
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regression_performance_test = TestSuite(tests=[\n",
    "    TestValueMeanError(lte=10, gte=-10),\n",
    "    TestValueMAE(lte=15),\n",
    "])\n",
    "\n",
    "regression_performance_test.run(reference_data=None, \n",
    "                                current_data=current.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'],\n",
    "                                column_mapping=column_mapping)\n",
    "regression_performance_test"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "## Week 3"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regression_performance_test = TestSuite(tests=[\n",
    "    TestValueMeanError(lte=10, gte=-10),\n",
    "    TestValueMAE(lte=15),\n",
    "])\n",
    "\n",
    "regression_performance_test.run(reference_data=None, \n",
    "                                current_data=current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'],\n",
    "                                column_mapping=column_mapping)\n",
    "regression_performance_test"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "markdown",
   "source": [
    "## What has happened?"
   ],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "regression_report = Report(\n",
    "    metrics=[\n",
    "        RegressionPreset()\n",
    "    ]\n",
    ")\n",
    "\n",
    "regression_report.run(reference_data=reference, \n",
    "                      current_data=current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'],\n",
    "                      column_mapping=column_mapping)\n",
    "regression_report"
   ],
   "outputs": [],
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "source": [
    "drift_report = Report(\n",
    "    metrics=[\n",
    "        DataDriftPreset()\n",
    "    ]\n",
    ")\n",
    "\n",
    "drift_report.run(reference_data=reference, \n",
    "                                current_data=current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'],\n",
    "                                column_mapping=column_mapping)\n",
    "drift_report"
   ],
   "outputs": [],
   "metadata": {
    "id": "ra77EvULf6zF"
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# Support Evidently\n",
    "Enjoyed the tutorial? Star Evidently on GitHub to contribute back! This helps us continue creating free open-source tools for the community. https://github.com/evidentlyai/evidently"
   ],
   "metadata": {}
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [
    "xnrzxRRnf6y_",
    "BAXuFSrTf6zC"
   ],
   "name": "bicycle_demand_monitoring.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
